#clear environment
rm(list = ls())
options(warn=-1)
#loading libraries (must be installed before where necessary)
library(readr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(network)
##
## 'network' 1.17.1 (2021-06-12), part of the Statnet Project
## * 'news(package="network")' for changes since last version
## * 'citation("network")' for citation information
## * 'https://statnet.org' for help, support, and other information
library(igraph)
##
## Attaching package: 'igraph'
## The following objects are masked from 'package:network':
##
## %c%, %s%, add.edges, add.vertices, delete.edges, delete.vertices,
## get.edge.attribute, get.edges, get.vertex.attribute, is.bipartite,
## is.directed, list.edge.attributes, list.vertex.attributes,
## set.edge.attribute, set.vertex.attribute
## The following objects are masked from 'package:dplyr':
##
## as_data_frame, groups, union
## The following objects are masked from 'package:stats':
##
## decompose, spectrum
## The following object is masked from 'package:base':
##
## union
library(centiserve)
## Loading required package: Matrix
library(tidygraph)
##
## Attaching package: 'tidygraph'
## The following object is masked from 'package:igraph':
##
## groups
## The following object is masked from 'package:stats':
##
## filter
library(networkD3)
library(visNetwork)
library(htmlwidgets)
##
## Attaching package: 'htmlwidgets'
## The following object is masked from 'package:networkD3':
##
## JS
library(ggraph)
#Loading the dataset
ged211 <- read_csv("ged211.csv")
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## .default = col_double(),
## relid = col_character(),
## code_status = col_character(),
## conflict_name = col_character(),
## dyad_name = col_character(),
## side_a = col_character(),
## side_b = col_character(),
## source_article = col_character(),
## source_office = col_character(),
## source_date = col_character(),
## source_headline = col_character(),
## source_original = col_character(),
## where_coordinates = col_character(),
## where_description = col_character(),
## adm_1 = col_character(),
## adm_2 = col_character(),
## geom_wkt = col_character(),
## country = col_character(),
## region = col_character(),
## date_start = col_datetime(format = ""),
## date_end = col_datetime(format = "")
## # ... with 1 more columns
## )
## ℹ Use `spec()` for the full column specifications.
head(ged211)
First we need a usable dataframe
#Lets take each instance with a death toll over 0 and create a df with only side_a, side_b and total death toll
#remove type of violence =3 because civilians are not considered actors
over0 <- subset(ged211, best>0 & type_of_violence!=3)
over0 <- cbind.data.frame(over0$side_a,over0$side_b,over0$best)
colnames(over0) <- c("side_a","side_b","best")
#Grouping and totalling
gpd <- aggregate(over0$best, list(over0$side_a,over0$side_b), FUN=sum)
colnames(gpd) <- c("side_a","side_b","total")
gpd <- gpd %>% arrange(desc(gpd$total))
#Look at the top values
head(gpd)
Then we need to get all the nodes, which are actors listed in either side_a or side_b
#All actors listed in side_a
sides_a <- gpd %>%
distinct(side_a)
#All actors listed in side_b
sides_b <- gpd %>%
distinct(side_b)
colnames(sides_a) <- c("sides")
colnames(sides_b) <- c("sides")
#Defining the nodes
nodes <- full_join(sides_a, sides_b, by = "sides")
#Defining the edges
edges <- gpd %>%
left_join(nodes, by = c("side_a" = "sides"))
edges <- edges %>%
left_join(nodes, by = c("side_b" = "sides"))
Now we can create the network
#Bidirectional so multiple set to true
routes_network <- network(edges, vertex.attr = nodes, matrix.type = "edgelist",ignore.eval = FALSE, multiple=TRUE)
#Checking we have created a network
class(routes_network)
## [1] "network"
routes_network
## Network attributes:
## vertices = 1325
## directed = TRUE
## hyper = FALSE
## loops = FALSE
## multiple = TRUE
## bipartite = FALSE
## total edges= 1212
## missing edges= 0
## non-missing edges= 1212
##
## Vertex attribute names:
## sides vertex.names
##
## Edge attribute names not shown
Now the network can be plotted
plot(routes_network, vertex.cex = 0.8, main="Network Structure")
Although this gives us a little bit of insight into the structure, it is not very meaningful by itself, so let’s work with an igraph
routes_igraph <- graph_from_data_frame(d = edges, vertices = nodes, directed = TRUE)
routes_igraph
## IGRAPH 2c39d74 DN-- 1325 1212 --
## + attr: name (v/c), total (e/n)
## + edges from 2c39d74 (vertex names):
## [1] Government of Syria ->Syrian insurgents
## [2] Government of Afghanistan->Taleban
## [3] Government of Eritrea ->Government of Ethiopia
## [4] Government of Iraq ->IS
## [5] Government of Sri Lanka ->LTTE
## [6] Government of Syria ->IS
## [7] Government of Ethiopia ->EPLF
## [8] Government of Ethiopia ->EPRDF
## + ... omitted several edges
Using this network, we can easily query information on whether two actors are in conflict
are.connected(routes_igraph,"Government of India","Government of Pakistan") #TRUE
## [1] TRUE
are.connected(routes_igraph,"Government of Syria","IS") #TRUE
## [1] TRUE
are.connected(routes_igraph,"Government of Syria","Government of Pakistan") #FALSE
## [1] FALSE
We can also display the network as an edge list for each actor
(as_adj_edge_list(routes_igraph))$`Government of Syria`
## + 4/1212 edges from 2c39d74 (vertex names):
## [1] Government of Syria->IS
## [2] Government of Syria->Syrian insurgents
## [3] Government of Syria->SDF
## [4] Government of Syria->PYD
routes_network
## Network attributes:
## vertices = 1325
## directed = TRUE
## hyper = FALSE
## loops = FALSE
## multiple = TRUE
## bipartite = FALSE
## total edges= 1212
## missing edges= 0
## non-missing edges= 1212
##
## Vertex attribute names:
## sides vertex.names
##
## Edge attribute names not shown
V(routes_igraph)$all <- degree(routes_igraph, mode = "all")
#Which actors have more than 10 times the median number of edges (most interconnected vertices)?
V(routes_igraph)[V(routes_igraph)$all > 10*median(V(routes_igraph)$all)]
## + 15/1325 vertices, named, from 2c39d74:
## [1] Government of Afghanistan Government of Ethiopia
## [3] Government of Sudan Government of Pakistan
## [5] IS Government of India
## [7] Government of Philippines Government of DR Congo (Zaire)
## [9] Government of Myanmar (Burma) Government of Chad
## [11] Fulani Government of Mali
## [13] Borana SDF
## [15] PYD
What does the distribution of edges look like
#Minimum number of edges in network
min(degree(routes_igraph))
## [1] 1
#Maximum number of edges in network
max(degree(routes_igraph))
## [1] 101
#Boxplot with distr of edges
boxplot(V(routes_igraph)$all, outline=FALSE, main="Boxplot of Edges")
#Vertex degree distr
hist(V(routes_igraph)$all, breaks=100, main="Degree Distribution of Organized Violence",xlab="# of Edges")
degree_distribution(routes_igraph)
## [1] 0.000000000 0.724528302 0.129811321 0.055849057 0.029433962 0.018113208
## [7] 0.011320755 0.006792453 0.004528302 0.004528302 0.003773585 0.002264151
## [13] 0.001509434 0.000754717 0.001509434 0.000754717 0.000000000 0.000754717
## [19] 0.000754717 0.000754717 0.000000000 0.000000000 0.000000000 0.000000000
## [25] 0.000754717 0.000754717 0.000000000 0.000000000 0.000000000 0.000000000
## [31] 0.000000000 0.000000000 0.000000000 0.000000000 0.000000000 0.000000000
## [37] 0.000000000 0.000000000 0.000000000 0.000000000 0.000000000 0.000000000
## [43] 0.000000000 0.000000000 0.000000000 0.000000000 0.000000000 0.000000000
## [49] 0.000000000 0.000000000 0.000000000 0.000000000 0.000000000 0.000000000
## [55] 0.000000000 0.000000000 0.000000000 0.000000000 0.000000000 0.000000000
## [61] 0.000000000 0.000000000 0.000000000 0.000000000 0.000000000 0.000000000
## [67] 0.000000000 0.000000000 0.000000000 0.000000000 0.000000000 0.000000000
## [73] 0.000000000 0.000000000 0.000000000 0.000000000 0.000000000 0.000000000
## [79] 0.000000000 0.000000000 0.000000000 0.000000000 0.000000000 0.000000000
## [85] 0.000000000 0.000000000 0.000000000 0.000000000 0.000000000 0.000000000
## [91] 0.000000000 0.000000000 0.000000000 0.000000000 0.000000000 0.000000000
## [97] 0.000000000 0.000000000 0.000000000 0.000000000 0.000000000 0.000754717
#For degree_distribution a numeric vector of the same length as the maximum degree plus one. The first element is the relative frequency zero degree vertices, the second vertices with degree one, etc.
pie(degree_distribution(routes_igraph),main="Number of Edges")
#Number of vertices
gorder(routes_igraph)
## [1] 1325
#Number of edges
gsize(routes_igraph)
## [1] 1212
#Reading degrees of actors
head(degree(routes_igraph))
## Government of Syria Government of Afghanistan Government of Eritrea
## 4 12 3
## Government of Iraq Government of Sri Lanka Government of Ethiopia
## 9 3 13
Now lets look at the actors that are in conflict with the most other actors
#Dataframe with number of degrees, ordered
degs <- as.data.frame(degree(routes_igraph))
colnames(degs) <- c("Degree")
head(degs[order(-degs$Degree),])
## [1] 101 25 24 19 18 17
#Highest degrees
most_degs <- subset(degs,degs>12)
most_degs
#Lets take each instance with a death toll over 0 and create a df with only side_a, side_b and total death toll
#remove type of violence =3 because civilians are not considered actors
#Three empty dataframes to be filled later
densities <- data.frame("Year" = 1989:2020, "Density" = NA)
latoras <- data.frame("Year" = 1989:2020, "Closeness" = NA)
meandeg <- data.frame("Year" = 1989:2020, "Degrees" = NA)
vertices_cnt <- data.frame("Year" = 1989:2020, "Nodes" = NA)
edges_cnt <- data.frame("Year" = 1989:2020, "Edges" = NA)
#Loop that calculates scores for the above dataframes for each year
#https://www.rdocumentation.org/packages/centiserve/versions/1.0.0
for (i in densities$Year) {
over0 <- subset(ged211, best>0 & type_of_violence!=3 & year==i)
over0 <- cbind.data.frame(over0$side_a,over0$side_b,over0$best)
colnames(over0) <- c("side_a","side_b","best")
#Grouping and totalling
gpd <- aggregate(over0$best, list(over0$side_a,over0$side_b), FUN=sum)
colnames(gpd) <- c("side_a","side_b","total")
gpd <- gpd %>% arrange(desc(gpd$total))
#Look at the top values
head(gpd)
#All actors listed in side_a
sides_a <- gpd %>%
distinct(side_a)
#All actors listed in side_b
sides_b <- gpd %>%
distinct(side_b)
colnames(sides_a) <- c("sides")
colnames(sides_b) <- c("sides")
#Defining the nodes
nodes <- full_join(sides_a, sides_b, by = "sides")
#Defining the edges
edges <- gpd %>%
left_join(nodes, by = c("side_a" = "sides"))
edges <- edges %>%
left_join(nodes, by = c("side_b" = "sides"))
routes_igraph <- graph_from_data_frame(d = edges, vertices = nodes, directed = TRUE)
densities$Density[densities$Year==i] <- edge_density(routes_igraph, loops = FALSE)
latoras$Closeness[latoras$Year==i] <- closeness.latora(routes_igraph)
meandeg$Degrees[meandeg$Year==i] <-mean(degree(routes_igraph))
vertices_cnt$Nodes[meandeg$Year==i] <-length(V(routes_igraph))
edges_cnt$Edges[meandeg$Year==i] <-length(E(routes_igraph))
}
densities
latoras
meandeg
plot(densities, main="Network Density over Time")
plot(latoras, main="Network Closeness over Time") #sum of inversed distances to all other nodes instead of the inversed of the sum of distances to all other nodes
plot(meandeg, main="Mean Degrees over Time")
plot(vertices_cnt, main="Node Count over Time")
plot(edges_cnt, main="Edge Count over Time")
#Tidy and arrange
routes_tidy <- tbl_graph(nodes = nodes, edges = edges, directed = TRUE)
routes_igraph_tidy <- as_tbl_graph(routes_igraph)
routes_tidy %>%
activate(edges) %>%
arrange(desc(total))
## # A tbl_graph: 255 nodes and 211 edges
## #
## # A directed acyclic simple graph with 56 components
## #
## # Edge Data: 211 x 3 (active)
## from to total
## <int> <int> <dbl>
## 1 1 127 20102
## 2 2 128 7633
## 3 3 129 4455
## 4 3 130 4362
## 5 4 131 3521
## 6 5 27 2330
## # … with 205 more rows
## #
## # Node Data: 255 x 1
## sides
## <chr>
## 1 Government of Afghanistan
## 2 Government of Azerbaijan
## 3 Jalisco Cartel New Generation
## # … with 252 more rows
routes_tidy
## # A tbl_graph: 255 nodes and 211 edges
## #
## # A directed acyclic simple graph with 56 components
## #
## # Node Data: 255 x 1 (active)
## sides
## <chr>
## 1 Government of Afghanistan
## 2 Government of Azerbaijan
## 3 Jalisco Cartel New Generation
## 4 Government of Syria
## 5 Government of Yemen (North Yemen)
## 6 Comando Vermelho
## # … with 249 more rows
## #
## # Edge Data: 211 x 3
## from to total
## <int> <int> <dbl>
## 1 1 127 20102
## 2 2 128 7633
## 3 3 129 4455
## # … with 208 more rows
routes_tidy %>%
activate(nodes) %>%
mutate(degree = centrality_degree(mode = 'total'))
## # A tbl_graph: 255 nodes and 211 edges
## #
## # A directed acyclic simple graph with 56 components
## #
## # Node Data: 255 x 2 (active)
## sides degree
## <chr> <dbl>
## 1 Government of Afghanistan 3
## 2 Government of Azerbaijan 1
## 3 Jalisco Cartel New Generation 7
## 4 Government of Syria 3
## 5 Government of Yemen (North Yemen) 3
## 6 Comando Vermelho 2
## # … with 249 more rows
## #
## # Edge Data: 211 x 3
## from to total
## <int> <int> <dbl>
## 1 1 127 20102
## 2 2 128 7633
## 3 3 129 4455
## # … with 208 more rows
routes_tidy %>%
activate(nodes)%>%
mutate(degree = centrality_degree(mode = 'total')) %>%
ggraph('nicely') +
geom_node_point(aes(size = degree)) +
geom_edge_link()
routes_tidy %>%
mutate(centrality = centrality_authority()) %>%
ggraph(layout = 'kk') +
geom_edge_link() +
geom_node_point(aes(colour = centrality)) +
scale_color_continuous(guide = 'legend') +
theme_graph()
As we can see, the edges are not weighted
is.weighted(routes_igraph)
## [1] FALSE
So let’s weight by the total deaths in for conflict
#Tidy and arrange
#tidygraph
routes_tidy <- tbl_graph(nodes = nodes, edges = edges, directed = TRUE)
routes_igraph_tidy <- as_tbl_graph(routes_igraph)
routes_tidy %>%
activate(edges) %>%
arrange(desc(total))
## # A tbl_graph: 255 nodes and 211 edges
## #
## # A directed acyclic simple graph with 56 components
## #
## # Edge Data: 211 x 3 (active)
## from to total
## <int> <int> <dbl>
## 1 1 127 20102
## 2 2 128 7633
## 3 3 129 4455
## 4 3 130 4362
## 5 4 131 3521
## 6 5 27 2330
## # … with 205 more rows
## #
## # Node Data: 255 x 1
## sides
## <chr>
## 1 Government of Afghanistan
## 2 Government of Azerbaijan
## 3 Jalisco Cartel New Generation
## # … with 252 more rows
routes_tidy %>%
activate(nodes) %>%
mutate(degree = centrality_degree(mode = 'total'))
## # A tbl_graph: 255 nodes and 211 edges
## #
## # A directed acyclic simple graph with 56 components
## #
## # Node Data: 255 x 2 (active)
## sides degree
## <chr> <dbl>
## 1 Government of Afghanistan 3
## 2 Government of Azerbaijan 1
## 3 Jalisco Cartel New Generation 7
## 4 Government of Syria 3
## 5 Government of Yemen (North Yemen) 3
## 6 Comando Vermelho 2
## # … with 249 more rows
## #
## # Edge Data: 211 x 3
## from to total
## <int> <int> <dbl>
## 1 1 127 20102
## 2 2 128 7633
## 3 3 129 4455
## # … with 208 more rows
head(E(routes_tidy)$total)
## [1] 20102 7633 4455 4362 3521 2330
routes_tidy <- set_edge_attr(routes_tidy, "weight", value= E(routes_tidy)$total)
is_weighted(routes_tidy)
## [1] TRUE
E(routes_tidy)[E(routes_tidy)$weight>0]
## + 211/211 edges from b57cdfb:
## [1] 1->127 2->128 3->129 3->130 4->131 5-> 27 3->132 6->133 7-> 92
## [10] 3->134 8-> 20 9->135 3->136 4-> 20 10->129 11-> 20 12-> 20 13->100
## [19] 14-> 20 3->137 15-> 20 16->103 17->138 18->113 8->117 19->139 20->103
## [28] 21->140 22->103 23-> 20 1-> 20 24-> 30 25->141 26->142 27->143 28->144
## [37] 29->145 30->146 31->147 32->148 6->149 25-> 31 22-> 20 33->150 25->151
## [46] 34-> 20 35->144 20->138 36->137 37->152 31->153 38->154 39->155 40->103
## [55] 41-> 39 32->156 42->157 43-> 92 44->158 3->159 14-> 49 45->160 46-> 42
## [64] 47->161 48->162 32-> 20 14-> 51 49-> 42 37->163 50->164 51->165 52->142
## [73] 14->166 53->167 16-> 20 51->168 54->169 55->142 56-> 95 57->170 53->171
## [82] 1->172 58->103 34->173 59->174 59->175 14->176 60->177 61->178 62->179
## + ... omitted several edges
nodes[c(652,130,6,14,240,14,653,654,37,285,655,656,657),]
## [1] NA "Santa Rosa de Lima Cartel"
## [3] "Comando Vermelho" "Government of DR Congo (Zaire)"
## [5] "Egbura Mozum" "Government of DR Congo (Zaire)"
## [7] NA NA
## [9] "Government of Ukraine" NA
## [11] NA NA
## [13] NA
nodes[1:10,]
## [1] "Government of Afghanistan" "Government of Azerbaijan"
## [3] "Jalisco Cartel New Generation" "Government of Syria"
## [5] "Government of Yemen (North Yemen)" "Comando Vermelho"
## [7] "Government of Somalia" "Government of Nigeria"
## [9] "Government of Ethiopia" "Juarez Cartel"
#Fraction of deaths due to actors in most conflicts and deadliest conflicts
top10actors <- subset(E(routes_tidy), V(routes_tidy)$sides %in% c("IS","Fulani","Government of India","Government of Chad","Government of Myanmar (Burma)","Government of DR Congo (Zaire)", "Government of Sudan","Government of Pakistan","Government of Mali","Government of Ethiopia"))
sum(top10actors$total)/(sum(E(routes_tidy)$total))
## [1] 0.08274523
#10 deadliest dyads
sum(E(routes_tidy)[1:10]$total)/(sum(E(routes_tidy)$total))
## [1] 0.687182
pie(top10actors$total)
pie(E(routes_tidy)[1:10]$total)
head(closeness.latora(routes_tidy, vids = V(routes_tidy))) #Latora centrality for disconnected graphs
## [1] 0.1518159541 0.0001310101 0.0382130330 0.3915847692 0.8908332205
## [6] 0.0065505620
Using this we can see what the distribution of closeness looks like
boxplot(closeness.latora(routes_tidy, vids = V(routes_tidy)), main="Closeness Distribution")
#library(ggraph)
#ggraph(routes_tidy, layout = "graphopt") + geom_node_point() + geom_edge_link(aes(width = "weight"), alpha = 0.8) + scale_edge_width(range = c(0.1, 3)) + geom_node_text(aes(label = sides), repel = TRUE) + labs(edge_width = "weight") + theme_graph()
Interactive Plot with visNetwork and D3forcenetworklibrary
Jesse Sadler. (2017, October 25). Introduction to Network Analysis with R. Retrieved from https://www.jessesadler.com/post/network-analysis-with-r/
Bradley, B. (2021, May 25). From igraph to visNetwork. Retrieved from https://towardsdatascience.com/from-igraph-to-visnetwork-7bc5a76fdeec
visIgraph(routes_igraph)
visIgraph(routes_igraph) %>% visOptions(nodesIdSelection = TRUE, highlightNearest = TRUE)
library(networkD3)
library(visNetwork)
#This finds the subclusters
sc <- cluster_walktrap(routes_igraph)
members <- membership(sc)
#Converting from igraph to a d3 network type
plt_d3 <- igraph_to_networkD3(routes_igraph, group = members)
#This creates the plot
pltint <- forceNetwork(Links = plt_d3$links, Nodes =plt_d3$nodes,
Source = 'source', Target = 'target', NodeID = 'name',Group = 'group', fontSize = 30, opacity = 1, zoom = TRUE)
pltint
#This saves the plot as an html file
saveWidget(pltint, "network.html")
readr: https://www.rdocumentation.org/packages/readr/versions/1.3.1 dplyr: https://www.rdocumentation.org/packages/dplyr/versions/0.7.8 ggplot2: https://ggplot2.tidyverse.org/
Inferring and Analysing the Network network: https://cran.r-project.org/web/packages/network/index.html igraph: https://igraph.org/r/ centiserve: https://www.centiserver.org/rpackage/ tidygraph: https://www.rdocumentation.org/packages/tidygraph/versions/1.2.0
Visualising the Network networkD3: https://www.rdocumentation.org/packages/networkD3/versions/0.4 visNetwork: https://www.rdocumentation.org/packages/visNetwork/versions/2.1.0 htmlwidgets: https://www.htmlwidgets.org/ ggraph: https://cran.r-project.org/web/packages/ggraph/index.html
Jesse Sadler. (2017, October 25). Introduction to Network Analysis with R. Retrieved from https://www.jessesadler.com/post/network-analysis-with-r/
Bradley, B. (2021, May 25). From igraph to visNetwork. Retrieved from https://towardsdatascience.com/from-igraph-to-visnetwork-7bc5a76fdeec